rm(list =ls())
options(scipen=999)
gc()
packages <-c("tidyverse","estimatr","plm","stargazer",
             "fastDummies","ICCbin","ihs","readstata13","xtable",
             "arm")

new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

lapply(packages, require, character.only = TRUE)
rm(packages, new.packages)

setwd("PUT YOUR DIRECTORY HERE")

## insheet data
data <- read.csv("./Datasets/panel_bare_bones.csv")

## clean data
data$X <- NULL
data$locality_year <- NULL
data$subbotnik_inkind <- NULL
data$locality <- as.character(tolower(data$locality))

## specify controls
controls <- c("locality", "region", "pop", "schools", "shops", "restaurants", "cinemas",
              "Frac48", "coal_sqm", "coal_mines", "minerals", "partition", "priests_continuous",
              "pop", "protests_40s", "sabotage_40s","terror_40s", "income_76_capita", 
              "income_77_capita")

## load controls
data <- merge(data, read.csv("./Datasets/main_confounders.csv")[,controls], by = c("locality"), all.x = T)
data$region_numeric <- as.numeric(as.factor(data$region))
rm(controls)

## add Prussian census controls
data <- merge(data, read.csv("./Datasets/prussia_clean.csv"), by = c("locality"), all.x = T)


######################
### Protest models ###
######################

## Focus on years with observations
data_t <- data[which(data$year>1979),]

### clean covariates

## create wealth index
data_t$shops <- scale(data_t$shops)
data_t$restaurants <- scale(data_t$restaurants)
data_t$cinemas <- scale(data_t$cinemas)
data_t$wealth_index <- rowSums(data_t[,c("shops","restaurants","cinemas")], na.rm = T)
head(data_t[,c("shops", "restaurants", "cinemas", "wealth_index")], 50)
data_t$wealth_index[data_t$wealth_index == "NaN"] = NA  
data_t$wealth <- scale(data_t$wealth_index)

# scale state capacity
data_t$state_capacity <- scale(data_t$schools)

# scale ethnic diversity
data_t$ethnic_diversity <- scale(data_t$Frac48)

# scale Russian occupation
data_t$Russian_occupation <- ifelse(data_t$partition=="Russian",1,0)
data_t$Russian_occupation <- scale(data_t$Russian_occupation)

# scale industrialization
data_t$industrialization <- scale(data_t$minerals)

# scale grievance
data_t$grievance <- scale(data_t$coal_sqm)

# scale grievance II
data_t$grievance_ii <- scale(data_t$coal_mines)

# scale protests 40s
data_t$protests_40s <- scale(data_t$protests_40s)

# scale sabotage 40s
data_t$sabotage_40s <- scale(data_t$sabotage_40s)

# scale terror 40s
data_t$terror_40s <- scale(data_t$terror_40s)

# scale incomes
data_t$income_76_capita <- scale(data_t$income_76_capita)
data_t$income_77_capita <- scale(data_t$income_77_capita)
data_t$income <- rowSums(data_t[,c("income_76_capita", "income_77_capita")], na.rm = T)
data_t$income <- scale(data_t$income)

# scale jews
data_t$jew_perc[is.na(data_t$jew_perc)] = mean(data_t$jew_perc, na.rm=TRUE)
data_t$jew_perc <- scale(data_t$jew_perc)

# turn subbotnik upside down, to mean sabotage
data_t$subbotnik_inkind_ <- data_t$subbotnik_inkind_*-1

# capture all confounders
confounders <- c("wealth", "income", "state_capacity", "ethnic_diversity", "Russian_occupation", 
                 "industrialization", "grievance", "grievance_ii", "protests_40s",
                 "sabotage_40s", "terror_40s", "jew_perc", "pop")


## average across all years in panel (because covariates are constant)
all_vars <- c(c("strikes", "subbotnik_inkind_", "commanders", "region_numeric"), confounders)
data_agg <- aggregate(data_t[,all_vars], by=list(Category=data_t$locality_numeric), FUN=mean, na.rm=T)

## run model
m_strikes_ols_averaged <- lm(paste("scale(strikes) ~ commanders + factor(region_numeric) + ", paste(confounders, collapse=" + ")), data = data_agg)
summary(m_strikes_ols_averaged)









###########################
### RUN SABOTAGE MODELS ###
###########################

## Focus on years with observations
data_t <- data[which(data$year<1980),]

### clean controls

## create wealth index
data_t$shops <- scale(data_t$shops)
data_t$restaurants <- scale(data_t$restaurants)
data_t$cinemas <- scale(data_t$cinemas)
data_t$wealth_index <- rowSums(data_t[,c("shops","restaurants","cinemas")], na.rm = T)
head(data_t[,c("shops", "restaurants", "cinemas", "wealth_index")], 50)
data_t$wealth_index[data_t$wealth_index == "NaN"] = NA  
data_t$wealth <- scale(data_t$wealth_index)

# scale state capacity
data_t$state_capacity <- scale(data_t$schools)

# scale ethnic diversity
data_t$ethnic_diversity <- scale(data_t$Frac48)

# scale Russian occupation
data_t$Russian_occupation <- ifelse(data_t$partition=="Russian",1,0)
data_t$Russian_occupation <- scale(data_t$Russian_occupation)

# scale industrialization
data_t$industrialization <- scale(data_t$minerals)

# scale grievance
data_t$grievance <- scale(data_t$coal_sqm)

# scale grievance II
data_t$grievance_ii <- scale(data_t$coal_mines)

# scale protests 40s
data_t$protests_40s <- scale(data_t$protests_40s)

# scale sabotage 40s
data_t$sabotage_40s <- scale(data_t$sabotage_40s)

# scale terror 40s
data_t$terror_40s <- scale(data_t$terror_40s)

# scale incomes
data_t$income_76_capita <- scale(data_t$income_76_capita)
data_t$income_77_capita <- scale(data_t$income_77_capita)
data_t$income <- rowSums(data_t[,c("income_76_capita", "income_77_capita")], na.rm = T)
data_t$income <- scale(data_t$income)

# scale jews
data_t$jew_perc[is.na(data_t$jew_perc)] = mean(data_t$jew_perc, na.rm=TRUE)
data_t$jew_perc <- scale(data_t$jew_perc)

# turn subbotnik upside down, to mean sabotage
data_t$subbotnik_inkind_ <- data_t$subbotnik_inkind_*-1

# capture all confounders
confounders <- c("wealth", "income", "state_capacity", "ethnic_diversity", "Russian_occupation", 
                 "industrialization", "grievance", "grievance_ii", "protests_40s",
                 "sabotage_40s", "terror_40s", "jew_perc", "pop")


## average across all years in panel (because covariates are constant)
all_vars <- c(c("strikes", "subbotnik_inkind_", "commanders", "region_numeric"), confounders)
data_agg <- aggregate(data_t[,all_vars], by=list(Category=data_t$locality_numeric), FUN=mean, na.rm=T)

m_subbotnik_inkind__ols_averaged <- lm(paste("scale(subbotnik_inkind_) ~ commanders  + factor(region_numeric) + ", paste(confounders, collapse=" + ")), data = data_agg)
summary(m_subbotnik_inkind__ols_averaged)





###################
### Output data ###
###################

stargazer(m_strikes_ols_averaged, m_subbotnik_inkind__ols_averaged, 
          dep.var.labels = c("Protest (z)", "Sabotage (z)"),
          style = "qje",
          covariate.labels = c("Surveillance", 
                               "Wealth", 
                               "Income",
                               "State capacity", 
                               "Ethnic diversity", 
                               "Russian occupation", 
                               "Industrialization", 
                               "Grievances (coal)",
                               "Grievances (mines)",
                               "Protests (40s)",
                               "Sabotage (40s)",
                               "Terror (40s)",
                               "Jews (1871)",
                               "Population"),
          star.char = c("*", "**", "***"),
          star.cutoffs = c(0.1, 0.05, 0.01),
          omit = c("Constant", "factor"),
          omit.stat = c("rsq", "f", "ser"), 
          omit.table.layout = "n",
          add.lines = list(c("Fixed effects", "Yes", "Yes"),
                           c("Controls", "Yes", "Yes")),
          out = "PUT YOUR FILEPATH HERE")




